
setwd("/Users/bethanyshockley/Documents/Spring 2020/Research/naturalization/")

sink("PSRM R_log.txt", append=F, type=c("output")) 

library(foreign)
library(cjoint)
library(car)


data = read.dta("PSRM replication priorities.dta")
attach(data)

# variable names
names(data)

#########################################################
# name stuff
################

names(data) <- sub("^female$", "Gender", names(data))
names(data) <- sub("^approval$", "Approval", names(data))


# Gender

data$`Gender` <- factor(data$`Gender`, levels = c(1,2),
                        labels = c("Male","Female")) 
data <- within (data, `Gender` <- relevel(`Gender`, ref = 1))
summary(data$`Gender`)

# Children of Qatari mothers

data$children_1 <- factor(data$children_1, levels = c(1,2),
                          labels = c("Not Mentioned","Mentioned")) 
data <- within (data, children_1 <- relevel(children_1, ref = 1))
summary(data$children_1)

# Born in Qatar

data$born_1 <- factor(data$born_1, levels = c(1,2),
                      labels = c("Not Mentioned","Mentioned")) 
data <- within (data, born_1 <- relevel(born_1, ref = 1))
summary(data$born_1)

# Non-Qataris

data$nonqatari_1 <- factor(data$nonqatari_1, levels = c(1,2),
                           labels = c("Not Mentioned","Mentioned")) 
data <- within (data, nonqatari_1 <- relevel(nonqatari_1, ref = 1))
summary(data$nonqatari_1)

# Special skills

data$skills_1 <- factor(data$skills_1, levels = c(1,2),
                        labels = c("Not Mentioned","Mentioned")) 
data <- within (data, skills_1 <- relevel(skills_1, ref = 1))
summary(data$skills_1)

# Special service

data$service_1 <- factor(data$service_1, levels = c(1,2),
                         labels = c("Not Mentioned","Mentioned")) 
data <- within (data, service_1 <- relevel(service_1, ref = 1))
summary(data$service_1) 

######### NO NEED FOR MAKE DESIGN BECAUSE NO CONSTRAINTS ###############


############## Figure 2: Treatment Effects for Immigrant Groups ##################
###########################################################
###This is the coefficent Children of Qatari mothers in Table A.2 and model 1 of A.4
###########################################################

summary(data$selected)

children <- amce(selected ~ children_1, data=data, design="uniform",cluster=TRUE, respondent.id="caseid", na.ignore=TRUE)

summary.amce(children)

############## BORN IN QATAR ###########
###########################################################
###This is the coefficient for born in Qatar in Table A.2 and model 1 of A.4
###########################################################

summary(data$selected)

born <- amce(selected ~ born_1, data=data, design="uniform",cluster=TRUE, respondent.id="caseid", na.ignore=TRUE)

summary.amce(born)

############## NON-QATARI TRIBES ###########
###########################################################
###This is the coefficient for "From non-Qatari tribes" in Table A.2 and model 1 of A.4
###########################################################

summary(data$selected)

nonqatari <- amce(selected ~ nonqatari_1, data=data, design="uniform",cluster=TRUE, respondent.id="caseid", na.ignore=TRUE)

summary.amce(nonqatari)

############## SPECIAL SKILLS ###########
###########################################################
###This is the coefficient for "Special skills" in Table A.2 and model 1 of A.4
###########################################################

summary(data$selected)

skills <- amce(selected ~ skills_1, data=data, design="uniform",cluster=TRUE, respondent.id="caseid", na.ignore=TRUE)

summary.amce(skills)

############## SPECIAL SERVICE ############
###########################################################
###This is the coefficient for "military service" in Table A.2 and model 1 of A.4
###########################################################
summary(data$selected)

service <- amce(selected ~ service_1, data=data, design="uniform",cluster=TRUE, respondent.id="caseid", na.ignore=TRUE)

summary.amce(service)


###### USE ABOVE FOR APPENDIX A.2 ############################


############################ PLOTTING EVERYTHING TOGETHER FOR FIGURE 2 #######################

childrenFrame <- data.frame(Variable = summary(children)$amce[, 2],
                            Coefficient = summary(children)$amce[, 3],
                            SE = summary(children)$amce[, 4],
                            modelName = "Children of Qatari mothers")
head(childrenFrame)
bornFrame <- data.frame(Variable = summary(born)$amce[, 2],
                        Coefficient = summary(born)$amce[, 3],
                        SE = summary(born)$amce[, 4],
                        modelName = "Born in Qatar")
head(bornFrame)
nonqatariFrame <- data.frame(Variable = summary(nonqatari)$amce[, 2],
                             Coefficient = summary(nonqatari)$amce[, 3],
                             SE = summary(nonqatari)$amce[, 4],
                             modelName = "From non-Qatari tribes")
head(nonqatariFrame)
skillsFrame <- data.frame(Variable = summary(skills)$amce[, 2],
                          Coefficient = summary(skills)$amce[, 3],
                          SE = summary(skills)$amce[, 4],
                          modelName = "Special skills")
head(skillsFrame)
serviceFrame <- data.frame(Variable = summary(service)$amce[, 2],
                           Coefficient = summary(service)$amce[, 3],
                           SE = summary(service)$amce[, 4],
                           modelName = "Military service")
head(serviceFrame)
# Combine these data.frames
PrioritiesIndvFrame2 <- data.frame(rbind(nonqatariFrame, serviceFrame, skillsFrame, bornFrame, childrenFrame)) 
head(PrioritiesIndvFrame2)

interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

pdf("figure2.pdf")

# Overall plot without subgroups

overallplot <- ggplot(PrioritiesIndvFrame2)
overallplot <- overallplot + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
overallplot <- overallplot + geom_linerange(aes(x = modelName, ymin = Coefficient - SE*interval1,
                                                ymax = Coefficient + SE*interval1),
                                            lwd = 1, position = position_dodge(width = 1/2))
overallplot <- overallplot + geom_pointrange(aes(x = modelName, y = Coefficient, ymin = Coefficient - SE*interval2,
                                                 ymax = Coefficient + SE*interval2),
                                             lwd = 1/2, position = position_dodge(width = 1/2),
                                             shape = 21, fill = "WHITE")
overallplot <- overallplot + scale_y_discrete(limits=c(-0.30,-0.20,-0.10,0.00, 0.10, 0.20, 0.30), expand=c(0.05,0.05))
overallplot <- overallplot + coord_flip() + theme_bw()
overallplot <- overallplot + ggtitle("")
overallplot <- overallplot + xlab("")
overallplot <- overallplot + ylab("Change in Probability of Selection")
overallplot <- overallplot + theme(legend.position="none" , axis.text=element_text(size=16) , axis.title=element_text(size=16))
print(overallplot)
dev.off()

#######################################################################################################################
###Appendix Table A.4 and Figure A.1
#######################################################################################################################
###All models should be subset by children_1="Not Mentioned"

############## BORN IN QATAR ###########
###########################################################
###This is the coefficient for "born in Qatar" in model 2 of A.4
###########################################################

summary(data$selected)

born_sub <- amce(selected ~ born_1, data=data, design="uniform",cluster=TRUE, subset=data$children_1=="Not Mentioned",  respondent.id="caseid", na.ignore=TRUE)

summary.amce(born_sub)

############## NON-QATARI TRIBES ###########
###########################################################
###This is the coefficient for "non-Qatari tribes" in model 2 of A.4
###########################################################
summary(data$selected)

nonqatari_sub <- amce(selected ~ nonqatari_1, data=data, design="uniform",cluster=TRUE, subset=data$children_1=="Not Mentioned", respondent.id="caseid", na.ignore=TRUE)

summary.amce(nonqatari_sub)

############## SPECIAL SKILLS ###########
###########################################################
###This is the coefficient for "special skills" in model 2 of A.4
###########################################################

summary(data$selected)

skills_sub <- amce(selected ~ skills_1, data=data, design="uniform",cluster=TRUE,  subset=data$children_1=="Not Mentioned", respondent.id="caseid", na.ignore=TRUE)

summary.amce(skills_sub)

############## SPECIAL SERVICE ############
###########################################################
###This is the coefficient for "military service" in model 2 of A.4
###########################################################

summary(data$selected)

service_sub <- amce(selected ~ service_1, data=data, design="uniform",cluster=TRUE, subset=data$children_1=="Not Mentioned", respondent.id="caseid", na.ignore=TRUE)

summary.amce(service_sub)

###Checks
summary(data$children_1)

####Putting it all together

############################ PLOTTING EVERYTHING TOGETHER #######################



bornFrame <- data.frame(Variable = summary(born_sub)$amce[, 2],
                        Coefficient = summary(born_sub)$amce[, 3],
                        SE = summary(born_sub)$amce[, 4],
                        modelName = "Born in Qatar")
head(bornFrame)
nonqatariFrame <- data.frame(Variable = summary(nonqatari_sub)$amce[, 2],
                             Coefficient = summary(nonqatari_sub)$amce[, 3],
                             SE = summary(nonqatari_sub)$amce[, 4],
                             modelName = "From non-Qatari tribes")
head(nonqatariFrame)
skillsFrame <- data.frame(Variable = summary(skills_sub)$amce[, 2],
                          Coefficient = summary(skills_sub)$amce[, 3],
                          SE = summary(skills_sub)$amce[, 4],
                          modelName = "Special skills")
head(skillsFrame)
serviceFrame <- data.frame(Variable = summary(service_sub)$amce[, 2],
                           Coefficient = summary(service_sub)$amce[, 3],
                           SE = summary(service_sub)$amce[, 4],
                           modelName = "Military service")
head(serviceFrame)
# Combine these data.frames
PrioritiesIndvFrame3 <- data.frame(rbind(nonqatariFrame, serviceFrame, skillsFrame, bornFrame)) 
head(PrioritiesIndvFrame3)

interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Overall plot without subgroups

pdf("figurea1.pdf")

overallplot2 <- ggplot(PrioritiesIndvFrame3, aes(colour=modelName))
overallplot2 <- overallplot2 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
overallplot2 <- overallplot2 + geom_linerange(aes(x = modelName, ymin = Coefficient - SE*interval1,
                                                ymax = Coefficient + SE*interval1),
                                            lwd = 1, position = position_dodge(width = 1/2))
overallplot2 <- overallplot2 + geom_pointrange(aes(x = modelName, y = Coefficient, ymin = Coefficient - SE*interval2,
                                                 ymax = Coefficient + SE*interval2),
                                             lwd = 1/2, position = position_dodge(width = 1/2),
                                             shape = 21, fill = "WHITE")
overallplot2 <- overallplot2 + scale_y_discrete(limits=c(-0.30,-0.20,-0.10,0.00, 0.10, 0.20, 0.30), expand=c(0.05,0.05))
overallplot2 <- overallplot2 + coord_flip() + theme_bw()
overallplot2 <- overallplot2 + ggtitle("Priority for Permanent Residence")
overallplot2 <- overallplot2 + labs(list(y= "Change in Probability of Selection" , x=""))
overallplot2 <- overallplot2 + theme(legend.position="none" , axis.text=element_text(size=16) , axis.title=element_text(size=16))
print(overallplot2)  
dev.off()

###################################################################################################
###Figure 3: Average Marginal Component Effects for Immigrant Attributes
###################################################################################################

##Figures 4 and 5 generated in STATA

detach(data)
citizenc <- read.dta("PSRM conjoint replication.dta")
attach(citizenc)
names(citizenc)

###Setting up the IVs
#####################################################################

#baseline Sunni
citizenc$namecand<- factor(citizenc$namecand, levels = c(1,2,3,4),
                           labels = c("Christian" ,"Sunni" , "Shia", "Not mentioned")) 
citizenc <- within(citizenc, namecand <- relevel(namecand, ref = 2))

#baseline Yemeni
citizenc$natcand<- factor(citizenc$natcand, levels = c(1,2,3,4,5),
                          labels = c("British" ,"Indian", "Palestinian", "Egyptian", "Yemeni")) 
citizenc <- within(citizenc, natcand <- relevel(natcand, ref = 5))

#baseline teacher
citizenc$jobcand<- factor(citizenc$jobcand, levels = c(1,2,3,4),
                          labels = c("Doctor" ,"Teacher", "Engineer", "Military")) 
citizenc <- within(citizenc, jobcand <- relevel(jobcand, ref = 2))

#baseline 20K
citizenc$salarycand<- factor(citizenc$salarycand, levels = c(1,2,3),
                             labels = c("20K" ,"40K" , "60K")) 
citizenc <- within(citizenc, salarycand <- relevel(salarycand, ref = 1))

#baseline less than 1 year
citizenc$timecand<- factor(citizenc$timecand, levels = c(1,2,3,4),
                           labels = c("Less than 1 year" ,"10 years" , "20 years" , "Born in Qatar")) 
citizenc <- within(citizenc, timecand <- relevel(timecand, ref = 1))

#baseline Arabic
citizenc$langcand<- factor(citizenc$langcand, levels = c(1,2,3),
                           labels = c("Arabic" ,"English" , "Arabic and English")) 
citizenc <- within(citizenc, langcand <- relevel(langcand, ref = 1))

#Respondent varying income
citizenc$income<- factor(citizenc$income, levels = c(1,2),
                         labels = c("Less than 40K" ,"More than 40K")) 
citizenc <- within(citizenc, income <- relevel(income, ref = 1))
summary(citizenc$income)


###########################################################################
###These are the coefficients and p-values for Table A.4
###########################################################################

#Model with interaction for respondent income
select_income <- amce(selected ~ namecand + natcand + timecand + salarycand + jobcand + langcand + namecand:income + natcand:income + langcand:income + timecand:income + salarycand:income + jobcand:income + income , data=citizenc, subset=!is.na(citizenc$income), respondent.varying="income", respondent.id="caseid", weights="wgt", cluster=TRUE, na.ignore=TRUE)
summary(select_income)

####FIGURE 3 #################
pdf("figure3.pdf")
plot.amce(select_income , xlab="Change in Probability of Selection" , text.size = 8, facet.names=NULL, colors=c("black") , attribute.names=c("Occupation", "Language", "Religion", "Nationality" , "Salary" , "Time in Country" ))
dev.off()
sink()